import os
from pathlib import Path

# 路径配置
BASE_DIR = Path(__file__).resolve().parent.parent
NLTK_DATA_PATH = BASE_DIR / 'nltk_data'
EPUB_PATH = BASE_DIR / 'books' / 'AliceInWonderland.epub'
KNOWN_WORDS_PATH = BASE_DIR / 'data' / 'known_words.xlsx'
OUTPUT_PATH = BASE_DIR / 'output' / 'vocabulary.xlsx'

# 处理参数
MIN_WORD_LENGTH = 4
MAX_WORD_LENGTH = 20
CHAPTER_TITLE_TAGS = ['h1', 'h2', 'h3', 'div.chapter-title']

# NLTK配置
NLTK_PACKAGES = {
    'wordnet': 'corpora/wordnet',
    'averaged_perceptron_tagger': 'taggers/averaged_perceptron_tagger',
    'punkt': 'tokenizers/punkt'
}

# 特殊处理规则
SPECIAL_ALLOW = {
    "o'clock": 'clock'
}